//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
static const char* primitiveContactsKernelsCL= \
"#ifndef B3_CONTACT4DATA_H\n"
"#define B3_CONTACT4DATA_H\n"
"#ifndef B3_FLOAT4_H\n"
"#define B3_FLOAT4_H\n"
"#ifndef B3_PLATFORM_DEFINITIONS_H\n"
"#define B3_PLATFORM_DEFINITIONS_H\n"
"struct MyTest\n"
"{\n"
"	int bla;\n"
"};\n"
"#ifdef __cplusplus\n"
"#else\n"
"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n"
"#define B3_LARGE_FLOAT 1e18f\n"
"#define B3_INFINITY 1e18f\n"
"#define b3Assert(a)\n"
"#define b3ConstArray(a) __global const a*\n"
"#define b3AtomicInc atomic_inc\n"
"#define b3AtomicAdd atomic_add\n"
"#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n"
"#define b3Cos native_cos\n"
"#define B3_STATIC\n"
"#endif\n"
"#endif\n"
"#ifdef __cplusplus\n"
"#else\n"
"	typedef float4	b3Float4;\n"
"	#define b3Float4ConstArg const b3Float4\n"
"	#define b3MakeFloat4 (float4)\n"
"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n"
"	{\n"
"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n"
"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
"		return dot(a1, b1);\n"
"	}\n"
"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n"
"	{\n"
"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n"
"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
"		return cross(a1, b1);\n"
"	}\n"
"	#define b3MinFloat4 min\n"
"	#define b3MaxFloat4 max\n"
"	#define b3Normalized(a) normalize(a)\n"
"#endif \n"
"		\n"
"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n"
"{\n"
"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n"
"		return false;\n"
"	return true;\n"
"}\n"
"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n"
"{\n"
"    float maxDot = -B3_INFINITY;\n"
"    int i = 0;\n"
"    int ptIndex = -1;\n"
"    for( i = 0; i < vecLen; i++ )\n"
"    {\n"
"        float dot = b3Dot3F4(vecArray[i],vec);\n"
"            \n"
"        if( dot > maxDot )\n"
"        {\n"
"            maxDot = dot;\n"
"            ptIndex = i;\n"
"        }\n"
"    }\n"
"	b3Assert(ptIndex>=0);\n"
"    if (ptIndex<0)\n"
"	{\n"
"		ptIndex = 0;\n"
"	}\n"
"    *dotOut = maxDot;\n"
"    return ptIndex;\n"
"}\n"
"#endif //B3_FLOAT4_H\n"
"typedef  struct b3Contact4Data b3Contact4Data_t;\n"
"struct b3Contact4Data\n"
"{\n"
"	b3Float4	m_worldPosB[4];\n"
"//	b3Float4	m_localPosA[4];\n"
"//	b3Float4	m_localPosB[4];\n"
"	b3Float4	m_worldNormalOnB;	//	w: m_nPoints\n"
"	unsigned short  m_restituitionCoeffCmp;\n"
"	unsigned short  m_frictionCoeffCmp;\n"
"	int m_batchIdx;\n"
"	int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n"
"	int m_bodyBPtrAndSignBit;\n"
"	int	m_childIndexA;\n"
"	int	m_childIndexB;\n"
"	int m_unused1;\n"
"	int m_unused2;\n"
"};\n"
"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n"
"{\n"
"	return (int)contact->m_worldNormalOnB.w;\n"
"};\n"
"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n"
"{\n"
"	contact->m_worldNormalOnB.w = (float)numPoints;\n"
"};\n"
"#endif //B3_CONTACT4DATA_H\n"
"#define SHAPE_CONVEX_HULL 3\n"
"#define SHAPE_PLANE 4\n"
"#define SHAPE_CONCAVE_TRIMESH 5\n"
"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n"
"#define SHAPE_SPHERE 7\n"
"#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n"
"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
"#ifdef cl_ext_atomic_counters_32\n"
"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"
"#else\n"
"#define counter32_t volatile __global int*\n"
"#endif\n"
"#define GET_GROUP_IDX get_group_id(0)\n"
"#define GET_LOCAL_IDX get_local_id(0)\n"
"#define GET_GLOBAL_IDX get_global_id(0)\n"
"#define GET_GROUP_SIZE get_local_size(0)\n"
"#define GET_NUM_GROUPS get_num_groups(0)\n"
"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n"
"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n"
"#define AtomInc(x) atom_inc(&(x))\n"
"#define AtomInc1(x, out) out = atom_inc(&(x))\n"
"#define AppendInc(x, out) out = atomic_inc(x)\n"
"#define AtomAdd(x, value) atom_add(&(x), value)\n"
"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n"
"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n"
"#define max2 max\n"
"#define min2 min\n"
"typedef unsigned int u32;\n"
"typedef struct \n"
"{\n"
"	union\n"
"	{\n"
"		float4	m_min;\n"
"		float   m_minElems[4];\n"
"		int			m_minIndices[4];\n"
"	};\n"
"	union\n"
"	{\n"
"		float4	m_max;\n"
"		float   m_maxElems[4];\n"
"		int			m_maxIndices[4];\n"
"	};\n"
"} btAabbCL;\n"
"///keep this in sync with btCollidable.h\n"
"typedef struct\n"
"{\n"
"	int m_numChildShapes;\n"
"	float m_radius;\n"
"	int m_shapeType;\n"
"	int m_shapeIndex;\n"
"	\n"
"} btCollidableGpu;\n"
"typedef struct\n"
"{\n"
"	float4	m_childPosition;\n"
"	float4	m_childOrientation;\n"
"	int m_shapeIndex;\n"
"	int m_unused0;\n"
"	int m_unused1;\n"
"	int m_unused2;\n"
"} btGpuChildShape;\n"
"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n"
"typedef struct\n"
"{\n"
"	float4 m_pos;\n"
"	float4 m_quat;\n"
"	float4 m_linVel;\n"
"	float4 m_angVel;\n"
"	u32 m_collidableIdx;	\n"
"	float m_invMass;\n"
"	float m_restituitionCoeff;\n"
"	float m_frictionCoeff;\n"
"} BodyData;\n"
"typedef struct  \n"
"{\n"
"	float4		m_localCenter;\n"
"	float4		m_extents;\n"
"	float4		mC;\n"
"	float4		mE;\n"
"	\n"
"	float			m_radius;\n"
"	int	m_faceOffset;\n"
"	int m_numFaces;\n"
"	int	m_numVertices;\n"
"	\n"
"	int m_vertexOffset;\n"
"	int	m_uniqueEdgesOffset;\n"
"	int	m_numUniqueEdges;\n"
"	int m_unused;\n"
"} ConvexPolyhedronCL;\n"
"typedef struct\n"
"{\n"
"	float4 m_plane;\n"
"	int m_indexOffset;\n"
"	int m_numIndices;\n"
"} btGpuFace;\n"
"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
"#define make_float4 (float4)\n"
"#define make_float2 (float2)\n"
"#define make_uint4 (uint4)\n"
"#define make_int4 (int4)\n"
"#define make_uint2 (uint2)\n"
"#define make_int2 (int2)\n"
"__inline\n"
"float fastDiv(float numerator, float denominator)\n"
"{\n"
"	return native_divide(numerator, denominator);	\n"
"//	return numerator/denominator;	\n"
"}\n"
"__inline\n"
"float4 fastDiv4(float4 numerator, float4 denominator)\n"
"{\n"
"	return native_divide(numerator, denominator);	\n"
"}\n"
"__inline\n"
"float4 cross3(float4 a, float4 b)\n"
"{\n"
"	return cross(a,b);\n"
"}\n"
"//#define dot3F4 dot\n"
"__inline\n"
"float dot3F4(float4 a, float4 b)\n"
"{\n"
"	float4 a1 = make_float4(a.xyz,0.f);\n"
"	float4 b1 = make_float4(b.xyz,0.f);\n"
"	return dot(a1, b1);\n"
"}\n"
"__inline\n"
"float4 fastNormalize4(float4 v)\n"
"{\n"
"	return fast_normalize(v);\n"
"}\n"
"///////////////////////////////////////\n"
"//	Quaternion\n"
"///////////////////////////////////////\n"
"typedef float4 Quaternion;\n"
"__inline\n"
"Quaternion qtMul(Quaternion a, Quaternion b);\n"
"__inline\n"
"Quaternion qtNormalize(Quaternion in);\n"
"__inline\n"
"float4 qtRotate(Quaternion q, float4 vec);\n"
"__inline\n"
"Quaternion qtInvert(Quaternion q);\n"
"__inline\n"
"Quaternion qtMul(Quaternion a, Quaternion b)\n"
"{\n"
"	Quaternion ans;\n"
"	ans = cross3( a, b );\n"
"	ans += a.w*b+b.w*a;\n"
"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n"
"	ans.w = a.w*b.w - dot3F4(a, b);\n"
"	return ans;\n"
"}\n"
"__inline\n"
"Quaternion qtNormalize(Quaternion in)\n"
"{\n"
"	return fastNormalize4(in);\n"
"//	in /= length( in );\n"
"//	return in;\n"
"}\n"
"__inline\n"
"float4 qtRotate(Quaternion q, float4 vec)\n"
"{\n"
"	Quaternion qInv = qtInvert( q );\n"
"	float4 vcpy = vec;\n"
"	vcpy.w = 0.f;\n"
"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n"
"	return out;\n"
"}\n"
"__inline\n"
"Quaternion qtInvert(Quaternion q)\n"
"{\n"
"	return (Quaternion)(-q.xyz, q.w);\n"
"}\n"
"__inline\n"
"float4 qtInvRotate(const Quaternion q, float4 vec)\n"
"{\n"
"	return qtRotate( qtInvert( q ), vec );\n"
"}\n"
"__inline\n"
"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n"
"{\n"
"	return qtRotate( *orientation, *p ) + (*translation);\n"
"}\n"
"void	trInverse(float4 translationIn, Quaternion orientationIn,\n"
"		float4* translationOut, Quaternion* orientationOut)\n"
"{\n"
"	*orientationOut = qtInvert(orientationIn);\n"
"	*translationOut = qtRotate(*orientationOut, -translationIn);\n"
"}\n"
"void	trMul(float4 translationA, Quaternion orientationA,\n"
"						float4 translationB, Quaternion orientationB,\n"
"		float4* translationOut, Quaternion* orientationOut)\n"
"{\n"
"	*orientationOut = qtMul(orientationA,orientationB);\n"
"	*translationOut = transform(&translationB,&translationA,&orientationA);\n"
"}\n"
"__inline\n"
"float4 normalize3(const float4 a)\n"
"{\n"
"	float4 n = make_float4(a.x, a.y, a.z, 0.f);\n"
"	return fastNormalize4( n );\n"
"}\n"
"__inline float4 lerp3(const float4 a,const float4 b, float  t)\n"
"{\n"
"	return make_float4(	a.x + (b.x - a.x) * t,\n"
"						a.y + (b.y - a.y) * t,\n"
"						a.z + (b.z - a.z) * t,\n"
"						0.f);\n"
"}\n"
"float signedDistanceFromPointToPlane(float4 point, float4 planeEqn, float4* closestPointOnFace)\n"
"{\n"
"	float4 n = (float4)(planeEqn.x, planeEqn.y, planeEqn.z, 0);\n"
"	float dist = dot3F4(n, point) + planeEqn.w;\n"
"	*closestPointOnFace = point - dist * n;\n"
"	return dist;\n"
"}\n"
"inline bool IsPointInPolygon(float4 p, \n"
"							const btGpuFace* face,\n"
"							__global const float4* baseVertex,\n"
"							__global const  int* convexIndices,\n"
"							float4* out)\n"
"{\n"
"    float4 a;\n"
"    float4 b;\n"
"    float4 ab;\n"
"    float4 ap;\n"
"    float4 v;\n"
"	float4 plane = make_float4(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f);\n"
"	\n"
"	if (face->m_numIndices<2)\n"
"		return false;\n"
"	\n"
"	float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]];\n"
"	\n"
"	b = v0;\n"
"    for(unsigned i=0; i != face->m_numIndices; ++i)\n"
"    {\n"
"		a = b;\n"
"		float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]];\n"
"		b = vi;\n"
"        ab = b-a;\n"
"        ap = p-a;\n"
"        v = cross3(ab,plane);\n"
"        if (dot(ap, v) > 0.f)\n"
"        {\n"
"            float ab_m2 = dot(ab, ab);\n"
"            float rt = ab_m2 != 0.f ? dot(ab, ap) / ab_m2 : 0.f;\n"
"            if (rt <= 0.f)\n"
"            {\n"
"                *out = a;\n"
"            }\n"
"            else if (rt >= 1.f) \n"
"            {\n"
"                *out = b;\n"
"            }\n"
"            else\n"
"            {\n"
"            	float s = 1.f - rt;\n"
"				out[0].x = s * a.x + rt * b.x;\n"
"				out[0].y = s * a.y + rt * b.y;\n"
"				out[0].z = s * a.z + rt * b.z;\n"
"            }\n"
"            return false;\n"
"        }\n"
"    }\n"
"    return true;\n"
"}\n"
"void	computeContactSphereConvex(int pairIndex,\n"
"																int bodyIndexA, int bodyIndexB, \n"
"																int collidableIndexA, int collidableIndexB, \n"
"																__global const BodyData* rigidBodies, \n"
"																__global const btCollidableGpu* collidables,\n"
"																__global const ConvexPolyhedronCL* convexShapes,\n"
"																__global const float4* convexVertices,\n"
"																__global const int* convexIndices,\n"
"																__global const btGpuFace* faces,\n"
"																__global struct b3Contact4Data* restrict globalContactsOut,\n"
"																counter32_t nGlobalContactsOut,\n"
"																int maxContactCapacity,\n"
"																float4 spherePos2,\n"
"																float radius,\n"
"																float4 pos,\n"
"																float4 quat\n"
"																)\n"
"{\n"
"	float4 invPos;\n"
"	float4 invOrn;\n"
"	trInverse(pos,quat, &invPos,&invOrn);\n"
"	float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n"
"	int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n"
"	int numFaces = convexShapes[shapeIndex].m_numFaces;\n"
"	float4 closestPnt = (float4)(0, 0, 0, 0);\n"
"	float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n"
"	float minDist = -1000000.f;\n"
"	bool bCollide = true;\n"
"	for ( int f = 0; f < numFaces; f++ )\n"
"	{\n"
"		btGpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f];\n"
"		// set up a plane equation \n"
"		float4 planeEqn;\n"
"		float4 n1 = face.m_plane;\n"
"		n1.w = 0.f;\n"
"		planeEqn = n1;\n"
"		planeEqn.w = face.m_plane.w;\n"
"		\n"
"	\n"
"		// compute a signed distance from the vertex in cloth to the face of rigidbody.\n"
"		float4 pntReturn;\n"
"		float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn);\n"
"		// If the distance is positive, the plane is a separating plane. \n"
"		if ( dist > radius )\n"
"		{\n"
"			bCollide = false;\n"
"			break;\n"
"		}\n"
"		if (dist>0)\n"
"		{\n"
"			//might hit an edge or vertex\n"
"			float4 out;\n"
"			float4 zeroPos = make_float4(0,0,0,0);\n"
"			bool isInPoly = IsPointInPolygon(spherePos,\n"
"					&face,\n"
"					&convexVertices[convexShapes[shapeIndex].m_vertexOffset],\n"
"					convexIndices,\n"
"           &out);\n"
"			if (isInPoly)\n"
"			{\n"
"				if (dist>minDist)\n"
"				{\n"
"					minDist = dist;\n"
"					closestPnt = pntReturn;\n"
"					hitNormalWorld = planeEqn;\n"
"					\n"
"				}\n"
"			} else\n"
"			{\n"
"				float4 tmp = spherePos-out;\n"
"				float l2 = dot(tmp,tmp);\n"
"				if (l2<radius*radius)\n"
"				{\n"
"					dist  = sqrt(l2);\n"
"					if (dist>minDist)\n"
"					{\n"
"						minDist = dist;\n"
"						closestPnt = out;\n"
"						hitNormalWorld = tmp/dist;\n"
"						\n"
"					}\n"
"					\n"
"				} else\n"
"				{\n"
"					bCollide = false;\n"
"					break;\n"
"				}\n"
"			}\n"
"		} else\n"
"		{\n"
"			if ( dist > minDist )\n"
"			{\n"
"				minDist = dist;\n"
"				closestPnt = pntReturn;\n"
"				hitNormalWorld.xyz = planeEqn.xyz;\n"
"			}\n"
"		}\n"
"		\n"
"	}\n"
"	\n"
"	if (bCollide && minDist > -10000)\n"
"	{\n"
"		float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n"
"		float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n"
"		\n"
"		float actualDepth = minDist-radius;\n"
"		if (actualDepth<=0.f)\n"
"		{\n"
"			\n"
"			pOnB1.w = actualDepth;\n"
"			int dstIdx;\n"
"			AppendInc( nGlobalContactsOut, dstIdx );\n"
"		\n"
"			\n"
"			if (1)//dstIdx < maxContactCapacity)\n"
"			{\n"
"				__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
"				c->m_worldNormalOnB = -normalOnSurfaceB1;\n"
"				c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
"				c->m_batchIdx = pairIndex;\n"
"				c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n"
"				c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n"
"				c->m_worldPosB[0] = pOnB1;\n"
"				c->m_childIndexA = -1;\n"
"				c->m_childIndexB = -1;\n"
"				GET_NPOINTS(*c) = 1;\n"
"			} \n"
"		}\n"
"	}//if (hasCollision)\n"
"}\n"
"							\n"
"int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n"
"{\n"
"	if( nPoints == 0 )\n"
"        return 0;\n"
"    \n"
"    if (nPoints <=4)\n"
"        return nPoints;\n"
"    \n"
"    \n"
"    if (nPoints >64)\n"
"        nPoints = 64;\n"
"    \n"
"	float4 center = make_float4(0.f);\n"
"	{\n"
"		\n"
"		for (int i=0;i<nPoints;i++)\n"
"			center += p[i];\n"
"		center /= (float)nPoints;\n"
"	}\n"
"    \n"
"	\n"
"    \n"
"	//	sample 4 directions\n"
"    \n"
"    float4 aVector = p[0] - center;\n"
"    float4 u = cross3( nearNormal, aVector );\n"
"    float4 v = cross3( nearNormal, u );\n"
"    u = normalize3( u );\n"
"    v = normalize3( v );\n"
"    \n"
"    \n"
"    //keep point with deepest penetration\n"
"    float minW= FLT_MAX;\n"
"    \n"
"    int minIndex=-1;\n"
"    \n"
"    float4 maxDots;\n"
"    maxDots.x = FLT_MIN;\n"
"    maxDots.y = FLT_MIN;\n"
"    maxDots.z = FLT_MIN;\n"
"    maxDots.w = FLT_MIN;\n"
"    \n"
"    //	idx, distance\n"
"    for(int ie = 0; ie<nPoints; ie++ )\n"
"    {\n"
"        if (p[ie].w<minW)\n"
"        {\n"
"            minW = p[ie].w;\n"
"            minIndex=ie;\n"
"        }\n"
"        float f;\n"
"        float4 r = p[ie]-center;\n"
"        f = dot3F4( u, r );\n"
"        if (f<maxDots.x)\n"
"        {\n"
"            maxDots.x = f;\n"
"            contactIdx[0].x = ie;\n"
"        }\n"
"        \n"
"        f = dot3F4( -u, r );\n"
"        if (f<maxDots.y)\n"
"        {\n"
"            maxDots.y = f;\n"
"            contactIdx[0].y = ie;\n"
"        }\n"
"        \n"
"        \n"
"        f = dot3F4( v, r );\n"
"        if (f<maxDots.z)\n"
"        {\n"
"            maxDots.z = f;\n"
"            contactIdx[0].z = ie;\n"
"        }\n"
"        \n"
"        f = dot3F4( -v, r );\n"
"        if (f<maxDots.w)\n"
"        {\n"
"            maxDots.w = f;\n"
"            contactIdx[0].w = ie;\n"
"        }\n"
"        \n"
"    }\n"
"    \n"
"    if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)\n"
"    {\n"
"        //replace the first contact with minimum (todo: replace contact with least penetration)\n"
"        contactIdx[0].x = minIndex;\n"
"    }\n"
"    \n"
"    return 4;\n"
"    \n"
"}\n"
"#define MAX_PLANE_CONVEX_POINTS 64\n"
"int computeContactPlaneConvex(int pairIndex,\n"
"								int bodyIndexA, int bodyIndexB, \n"
"								int collidableIndexA, int collidableIndexB, \n"
"								__global const BodyData* rigidBodies, \n"
"								__global const btCollidableGpu*collidables,\n"
"								__global const ConvexPolyhedronCL* convexShapes,\n"
"								__global const float4* convexVertices,\n"
"								__global const int* convexIndices,\n"
"								__global const btGpuFace* faces,\n"
"								__global struct b3Contact4Data* restrict globalContactsOut,\n"
"								counter32_t nGlobalContactsOut,\n"
"								int maxContactCapacity,\n"
"								float4 posB,\n"
"								Quaternion ornB\n"
"								)\n"
"{\n"
"	int resultIndex=-1;\n"
"		int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n"
"	__global const ConvexPolyhedronCL* hullB = &convexShapes[shapeIndex];\n"
"	\n"
"	float4 posA;\n"
"	posA = rigidBodies[bodyIndexA].m_pos;\n"
"	Quaternion ornA;\n"
"	ornA = rigidBodies[bodyIndexA].m_quat;\n"
"	int numContactsOut = 0;\n"
"	int numWorldVertsB1= 0;\n"
"	float4 planeEq;\n"
"	 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n"
"	float4 planeNormal = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n"
"	float4 planeNormalWorld;\n"
"	planeNormalWorld = qtRotate(ornA,planeNormal);\n"
"	float planeConstant = planeEq.w;\n"
"	\n"
"	float4 invPosA;Quaternion invOrnA;\n"
"	float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1;\n"
"	{\n"
"		\n"
"		trInverse(posA,ornA,&invPosA,&invOrnA);\n"
"		trMul(invPosA,invOrnA,posB,ornB,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n"
"	}\n"
"	float4 invPosB;Quaternion invOrnB;\n"
"	float4 planeInConvexPos1;	Quaternion planeInConvexOrn1;\n"
"	{\n"
"		\n"
"		trInverse(posB,ornB,&invPosB,&invOrnB);\n"
"		trMul(invPosB,invOrnB,posA,ornA,&planeInConvexPos1,&planeInConvexOrn1);	\n"
"	}\n"
"	\n"
"	float4 planeNormalInConvex = qtRotate(planeInConvexOrn1,-planeNormal);\n"
"	float maxDot = -1e30;\n"
"	int hitVertex=-1;\n"
"	float4 hitVtx;\n"
"	float4 contactPoints[MAX_PLANE_CONVEX_POINTS];\n"
"	int numPoints = 0;\n"
"	int4 contactIdx;\n"
"	contactIdx=make_int4(0,1,2,3);\n"
"    \n"
"	\n"
"	for (int i=0;i<hullB->m_numVertices;i++)\n"
"	{\n"
"		float4 vtx = convexVertices[hullB->m_vertexOffset+i];\n"
"		float curDot = dot(vtx,planeNormalInConvex);\n"
"		if (curDot>maxDot)\n"
"		{\n"
"			hitVertex=i;\n"
"			maxDot=curDot;\n"
"			hitVtx = vtx;\n"
"			//make sure the deepest points is always included\n"
"			if (numPoints==MAX_PLANE_CONVEX_POINTS)\n"
"				numPoints--;\n"
"		}\n"
"		if (numPoints<MAX_PLANE_CONVEX_POINTS)\n"
"		{\n"
"			float4 vtxWorld = transform(&vtx, &posB, &ornB);\n"
"			float4 vtxInPlane = transform(&vtxWorld, &invPosA, &invOrnA);//oplaneTransform.inverse()*vtxWorld;\n"
"			float dist = dot(planeNormal,vtxInPlane)-planeConstant;\n"
"			if (dist<0.f)\n"
"			{\n"
"				vtxWorld.w = dist;\n"
"				contactPoints[numPoints] = vtxWorld;\n"
"				numPoints++;\n"
"			}\n"
"		}\n"
"	}\n"
"	int numReducedPoints  = numPoints;\n"
"	if (numPoints>4)\n"
"	{\n"
"		numReducedPoints = extractManifoldSequential( contactPoints, numPoints, planeNormalInConvex, &contactIdx);\n"
"	}\n"
"	if (numReducedPoints>0)\n"
"	{\n"
"		int dstIdx;\n"
"	    AppendInc( nGlobalContactsOut, dstIdx );\n"
"		if (dstIdx < maxContactCapacity)\n"
"		{\n"
"			resultIndex = dstIdx;\n"
"			__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
"			c->m_worldNormalOnB = -planeNormalWorld;\n"
"			//c->setFrictionCoeff(0.7);\n"
"			//c->setRestituitionCoeff(0.f);\n"
"			c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
"			c->m_batchIdx = pairIndex;\n"
"			c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n"
"			c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n"
"			c->m_childIndexA = -1;\n"
"			c->m_childIndexB = -1;\n"
"			switch (numReducedPoints)\n"
"            {\n"
"                case 4:\n"
"                    c->m_worldPosB[3] = contactPoints[contactIdx.w];\n"
"                case 3:\n"
"                    c->m_worldPosB[2] = contactPoints[contactIdx.z];\n"
"                case 2:\n"
"                    c->m_worldPosB[1] = contactPoints[contactIdx.y];\n"
"                case 1:\n"
"                    c->m_worldPosB[0] = contactPoints[contactIdx.x];\n"
"                default:\n"
"                {\n"
"                }\n"
"            };\n"
"			\n"
"			GET_NPOINTS(*c) = numReducedPoints;\n"
"		}//if (dstIdx < numPairs)\n"
"	}	\n"
"	return resultIndex;\n"
"}\n"
"void	computeContactPlaneSphere(int pairIndex,\n"
"																int bodyIndexA, int bodyIndexB, \n"
"																int collidableIndexA, int collidableIndexB, \n"
"																__global const BodyData* rigidBodies, \n"
"																__global const btCollidableGpu* collidables,\n"
"																__global const btGpuFace* faces,\n"
"																__global struct b3Contact4Data* restrict globalContactsOut,\n"
"																counter32_t nGlobalContactsOut,\n"
"																int maxContactCapacity)\n"
"{\n"
"	float4 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n"
"	float radius = collidables[collidableIndexB].m_radius;\n"
"	float4 posA1 = rigidBodies[bodyIndexA].m_pos;\n"
"	float4 ornA1 = rigidBodies[bodyIndexA].m_quat;\n"
"	float4 posB1 = rigidBodies[bodyIndexB].m_pos;\n"
"	float4 ornB1 = rigidBodies[bodyIndexB].m_quat;\n"
"	\n"
"	bool hasCollision = false;\n"
"	float4 planeNormal1 = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n"
"	float planeConstant = planeEq.w;\n"
"	float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1;\n"
"	{\n"
"		float4 invPosA;Quaternion invOrnA;\n"
"		trInverse(posA1,ornA1,&invPosA,&invOrnA);\n"
"		trMul(invPosA,invOrnA,posB1,ornB1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n"
"	}\n"
"	float4 planeInConvexPos1;	Quaternion planeInConvexOrn1;\n"
"	{\n"
"		float4 invPosB;Quaternion invOrnB;\n"
"		trInverse(posB1,ornB1,&invPosB,&invOrnB);\n"
"		trMul(invPosB,invOrnB,posA1,ornA1,&planeInConvexPos1,&planeInConvexOrn1);	\n"
"	}\n"
"	float4 vtx1 = qtRotate(planeInConvexOrn1,-planeNormal1)*radius;\n"
"	float4 vtxInPlane1 = transform(&vtx1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n"
"	float distance = dot3F4(planeNormal1,vtxInPlane1) - planeConstant;\n"
"	hasCollision = distance < 0.f;//m_manifoldPtr->getContactBreakingThreshold();\n"
"	if (hasCollision)\n"
"	{\n"
"		float4 vtxInPlaneProjected1 = vtxInPlane1 -   distance*planeNormal1;\n"
"		float4 vtxInPlaneWorld1 = transform(&vtxInPlaneProjected1,&posA1,&ornA1);\n"
"		float4 normalOnSurfaceB1 = qtRotate(ornA1,planeNormal1);\n"
"		float4 pOnB1 = vtxInPlaneWorld1+normalOnSurfaceB1*distance;\n"
"		pOnB1.w = distance;\n"
"		int dstIdx;\n"
"    AppendInc( nGlobalContactsOut, dstIdx );\n"
"		\n"
"		if (dstIdx < maxContactCapacity)\n"
"		{\n"
"			__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
"			c->m_worldNormalOnB = -normalOnSurfaceB1;\n"
"			c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
"			c->m_batchIdx = pairIndex;\n"
"			c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n"
"			c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n"
"			c->m_worldPosB[0] = pOnB1;\n"
"			c->m_childIndexA = -1;\n"
"			c->m_childIndexB = -1;\n"
"			GET_NPOINTS(*c) = 1;\n"
"		}//if (dstIdx < numPairs)\n"
"	}//if (hasCollision)\n"
"}\n"
"__kernel void   primitiveContactsKernel( __global int4* pairs, \n"
"																					__global const BodyData* rigidBodies, \n"
"																					__global const btCollidableGpu* collidables,\n"
"																					__global const ConvexPolyhedronCL* convexShapes, \n"
"																					__global const float4* vertices,\n"
"																					__global const float4* uniqueEdges,\n"
"																					__global const btGpuFace* faces,\n"
"																					__global const int* indices,\n"
"																					__global struct b3Contact4Data* restrict globalContactsOut,\n"
"																					counter32_t nGlobalContactsOut,\n"
"																					int numPairs, int maxContactCapacity)\n"
"{\n"
"	int i = get_global_id(0);\n"
"	int pairIndex = i;\n"
"	\n"
"	float4 worldVertsB1[64];\n"
"	float4 worldVertsB2[64];\n"
"	int capacityWorldVerts = 64;	\n"
"	float4 localContactsOut[64];\n"
"	int localContactCapacity=64;\n"
"	\n"
"	float minDist = -1e30f;\n"
"	float maxDist = 0.02f;\n"
"	if (i<numPairs)\n"
"	{\n"
"		int bodyIndexA = pairs[i].x;\n"
"		int bodyIndexB = pairs[i].y;\n"
"			\n"
"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n"
"	\n"
"		if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n"
"			collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n"
"		{\n"
"			float4 posB;\n"
"			posB = rigidBodies[bodyIndexB].m_pos;\n"
"			Quaternion ornB;\n"
"			ornB = rigidBodies[bodyIndexB].m_quat;\n"
"			int contactIndex = computeContactPlaneConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n"
"																rigidBodies,collidables,convexShapes,vertices,indices,\n"
"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity, posB,ornB);\n"
"			if (contactIndex>=0)\n"
"				pairs[pairIndex].z = contactIndex;\n"
"			return;\n"
"		}\n"
"		if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n"
"			collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n"
"		{\n"
"			float4 posA;\n"
"			posA = rigidBodies[bodyIndexA].m_pos;\n"
"			Quaternion ornA;\n"
"			ornA = rigidBodies[bodyIndexA].m_quat;\n"
"			int contactIndex = computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA,  collidableIndexB,collidableIndexA, \n"
"																rigidBodies,collidables,convexShapes,vertices,indices,\n"
"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n"
"			if (contactIndex>=0)\n"
"				pairs[pairIndex].z = contactIndex;\n"
"			return;\n"
"		}\n"
"		if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n"
"			collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n"
"		{\n"
"			computeContactPlaneSphere(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n"
"																rigidBodies,collidables,faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n"
"			return;\n"
"		}\n"
"		if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n"
"			collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n"
"		{\n"
"			computeContactPlaneSphere( pairIndex, bodyIndexB,bodyIndexA,  collidableIndexB,collidableIndexA, \n"
"																rigidBodies,collidables,\n"
"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n"
"			return;\n"
"		}\n"
"		\n"
"	\n"
"		if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n"
"			collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n"
"		{\n"
"		\n"
"			float4 spherePos = rigidBodies[bodyIndexA].m_pos;\n"
"			float sphereRadius = collidables[collidableIndexA].m_radius;\n"
"			float4 convexPos = rigidBodies[bodyIndexB].m_pos;\n"
"			float4 convexOrn = rigidBodies[bodyIndexB].m_quat;\n"
"			computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n"
"																rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n"
"																spherePos,sphereRadius,convexPos,convexOrn);\n"
"			return;\n"
"		}\n"
"		if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n"
"			collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n"
"		{\n"
"		\n"
"			float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n"
"			float sphereRadius = collidables[collidableIndexB].m_radius;\n"
"			float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n"
"			float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n"
"			computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n"
"																rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n"
"																spherePos,sphereRadius,convexPos,convexOrn);\n"
"			return;\n"
"		}\n"
"	\n"
"	\n"
"	\n"
"		\n"
"	\n"
"	\n"
"		if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n"
"			collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n"
"		{\n"
"			//sphere-sphere\n"
"			float radiusA = collidables[collidableIndexA].m_radius;\n"
"			float radiusB = collidables[collidableIndexB].m_radius;\n"
"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n"
"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n"
"			float4 diff = posA-posB;\n"
"			float len = length(diff);\n"
"			\n"
"			///iff distance positive, don't generate a new contact\n"
"			if ( len <= (radiusA+radiusB))\n"
"			{\n"
"				///distance (negative means penetration)\n"
"				float dist = len - (radiusA+radiusB);\n"
"				float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);\n"
"				if (len > 0.00001)\n"
"				{\n"
"					normalOnSurfaceB = diff / len;\n"
"				}\n"
"				float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n"
"				contactPosB.w = dist;\n"
"								\n"
"				int dstIdx;\n"
"				 AppendInc( nGlobalContactsOut, dstIdx );\n"
"				\n"
"				if (dstIdx < maxContactCapacity)\n"
"				{\n"
"					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
"					c->m_worldNormalOnB = normalOnSurfaceB;\n"
"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
"					c->m_batchIdx = pairIndex;\n"
"					int bodyA = pairs[pairIndex].x;\n"
"					int bodyB = pairs[pairIndex].y;\n"
"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n"
"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n"
"					c->m_worldPosB[0] = contactPosB;\n"
"					c->m_childIndexA = -1;\n"
"					c->m_childIndexB = -1;\n"
"					GET_NPOINTS(*c) = 1;\n"
"				}//if (dstIdx < numPairs)\n"
"			}//if ( len <= (radiusA+radiusB))\n"
"			return;\n"
"		}//SHAPE_SPHERE SHAPE_SPHERE\n"
"	}//	if (i<numPairs)\n"
"}\n"
"// work-in-progress\n"
"__kernel void   processCompoundPairsPrimitivesKernel( __global const int4* gpuCompoundPairs,\n"
"													__global const BodyData* rigidBodies, \n"
"													__global const btCollidableGpu* collidables,\n"
"													__global const ConvexPolyhedronCL* convexShapes, \n"
"													__global const float4* vertices,\n"
"													__global const float4* uniqueEdges,\n"
"													__global const btGpuFace* faces,\n"
"													__global const int* indices,\n"
"													__global btAabbCL* aabbs,\n"
"													__global const btGpuChildShape* gpuChildShapes,\n"
"													__global struct b3Contact4Data* restrict globalContactsOut,\n"
"													counter32_t nGlobalContactsOut,\n"
"													int numCompoundPairs, int maxContactCapacity\n"
"													)\n"
"{\n"
"	int i = get_global_id(0);\n"
"	if (i<numCompoundPairs)\n"
"	{\n"
"		int bodyIndexA = gpuCompoundPairs[i].x;\n"
"		int bodyIndexB = gpuCompoundPairs[i].y;\n"
"		int childShapeIndexA = gpuCompoundPairs[i].z;\n"
"		int childShapeIndexB = gpuCompoundPairs[i].w;\n"
"		\n"
"		int collidableIndexA = -1;\n"
"		int collidableIndexB = -1;\n"
"		\n"
"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n"
"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n"
"		\n"
"		float4 ornB = rigidBodies[bodyIndexB].m_quat;\n"
"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n"
"							\n"
"		if (childShapeIndexA >= 0)\n"
"		{\n"
"			collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n"
"			float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n"
"			float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n"
"			float4 newPosA = qtRotate(ornA,childPosA)+posA;\n"
"			float4 newOrnA = qtMul(ornA,childOrnA);\n"
"			posA = newPosA;\n"
"			ornA = newOrnA;\n"
"		} else\n"
"		{\n"
"			collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
"		}\n"
"		\n"
"		if (childShapeIndexB>=0)\n"
"		{\n"
"			collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n"
"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n"
"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n"
"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n"
"			float4 newOrnB = qtMul(ornB,childOrnB);\n"
"			posB = newPosB;\n"
"			ornB = newOrnB;\n"
"		} else\n"
"		{\n"
"			collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;	\n"
"		}\n"
"	\n"
"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n"
"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n"
"	\n"
"		int shapeTypeA = collidables[collidableIndexA].m_shapeType;\n"
"		int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n"
"		int pairIndex = i;\n"
"		if ((shapeTypeA == SHAPE_PLANE) && (shapeTypeB==SHAPE_CONVEX_HULL))\n"
"		{\n"
"			computeContactPlaneConvex( pairIndex, bodyIndexA,bodyIndexB,  collidableIndexA,collidableIndexB, \n"
"																rigidBodies,collidables,convexShapes,vertices,indices,\n"
"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity,posB,ornB);\n"
"			return;\n"
"		}\n"
"		if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB==SHAPE_PLANE))\n"
"		{\n"
"			computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA,  collidableIndexB,collidableIndexA, \n"
"																rigidBodies,collidables,convexShapes,vertices,indices,\n"
"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n"
"			return;\n"
"		}\n"
"		if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB == SHAPE_SPHERE))\n"
"		{\n"
"			float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n"
"			float sphereRadius = collidables[collidableIndexB].m_radius;\n"
"			float4 convexPos = posA;\n"
"			float4 convexOrn = ornA;\n"
"			\n"
"			computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA , collidableIndexB,collidableIndexA, \n"
"										rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n"
"										spherePos,sphereRadius,convexPos,convexOrn);\n"
"	\n"
"			return;\n"
"		}\n"
"		if ((shapeTypeA == SHAPE_SPHERE) && (shapeTypeB == SHAPE_CONVEX_HULL))\n"
"		{\n"
"			float4 spherePos = rigidBodies[bodyIndexA].m_pos;\n"
"			float sphereRadius = collidables[collidableIndexA].m_radius;\n"
"			float4 convexPos = posB;\n"
"			float4 convexOrn = ornB;\n"
"			\n"
"			computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n"
"										rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n"
"										spherePos,sphereRadius,convexPos,convexOrn);\n"
"	\n"
"			return;\n"
"		}\n"
"	}//	if (i<numCompoundPairs)\n"
"}\n"
"bool pointInTriangle(const float4* vertices, const float4* normal, float4 *p )\n"
"{\n"
"	const float4* p1 = &vertices[0];\n"
"	const float4* p2 = &vertices[1];\n"
"	const float4* p3 = &vertices[2];\n"
"	float4 edge1;	edge1 = (*p2 - *p1);\n"
"	float4 edge2;	edge2 = ( *p3 - *p2 );\n"
"	float4 edge3;	edge3 = ( *p1 - *p3 );\n"
"	\n"
"	float4 p1_to_p; p1_to_p = ( *p - *p1 );\n"
"	float4 p2_to_p; p2_to_p = ( *p - *p2 );\n"
"	float4 p3_to_p; p3_to_p = ( *p - *p3 );\n"
"	float4 edge1_normal; edge1_normal = ( cross(edge1,*normal));\n"
"	float4 edge2_normal; edge2_normal = ( cross(edge2,*normal));\n"
"	float4 edge3_normal; edge3_normal = ( cross(edge3,*normal));\n"
"	\n"
"	\n"
"	float r1, r2, r3;\n"
"	r1 = dot(edge1_normal,p1_to_p );\n"
"	r2 = dot(edge2_normal,p2_to_p );\n"
"	r3 = dot(edge3_normal,p3_to_p );\n"
"	\n"
"	if ( r1 > 0 && r2 > 0 && r3 > 0 )\n"
"		return true;\n"
"    if ( r1 <= 0 && r2 <= 0 && r3 <= 0 ) \n"
"		return true;\n"
"	return false;\n"
"}\n"
"float segmentSqrDistance(float4 from, float4 to,float4 p, float4* nearest) \n"
"{\n"
"	float4 diff = p - from;\n"
"	float4 v = to - from;\n"
"	float t = dot(v,diff);\n"
"	\n"
"	if (t > 0) \n"
"	{\n"
"		float dotVV = dot(v,v);\n"
"		if (t < dotVV) \n"
"		{\n"
"			t /= dotVV;\n"
"			diff -= t*v;\n"
"		} else \n"
"		{\n"
"			t = 1;\n"
"			diff -= v;\n"
"		}\n"
"	} else\n"
"	{\n"
"		t = 0;\n"
"	}\n"
"	*nearest = from + t*v;\n"
"	return dot(diff,diff);	\n"
"}\n"
"void	computeContactSphereTriangle(int pairIndex,\n"
"									int bodyIndexA, int bodyIndexB,\n"
"									int collidableIndexA, int collidableIndexB, \n"
"									__global const BodyData* rigidBodies, \n"
"									__global const btCollidableGpu* collidables,\n"
"									const float4* triangleVertices,\n"
"									__global struct b3Contact4Data* restrict globalContactsOut,\n"
"									counter32_t nGlobalContactsOut,\n"
"									int maxContactCapacity,\n"
"									float4 spherePos2,\n"
"									float radius,\n"
"									float4 pos,\n"
"									float4 quat,\n"
"									int faceIndex\n"
"									)\n"
"{\n"
"	float4 invPos;\n"
"	float4 invOrn;\n"
"	trInverse(pos,quat, &invPos,&invOrn);\n"
"	float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n"
"	int numFaces = 3;\n"
"	float4 closestPnt = (float4)(0, 0, 0, 0);\n"
"	float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n"
"	float minDist = -1000000.f;\n"
"	bool bCollide = false;\n"
"	\n"
"	//////////////////////////////////////\n"
"	float4 sphereCenter;\n"
"	sphereCenter = spherePos;\n"
"	const float4* vertices = triangleVertices;\n"
"	float contactBreakingThreshold = 0.f;//todo?\n"
"	float radiusWithThreshold = radius + contactBreakingThreshold;\n"
"	float4 edge10;\n"
"	edge10 = vertices[1]-vertices[0];\n"
"	edge10.w = 0.f;//is this needed?\n"
"	float4 edge20;\n"
"	edge20 = vertices[2]-vertices[0];\n"
"	edge20.w = 0.f;//is this needed?\n"
"	float4 normal = cross3(edge10,edge20);\n"
"	normal = normalize(normal);\n"
"	float4 p1ToCenter;\n"
"	p1ToCenter = sphereCenter - vertices[0];\n"
"	\n"
"	float distanceFromPlane = dot(p1ToCenter,normal);\n"
"	if (distanceFromPlane < 0.f)\n"
"	{\n"
"		//triangle facing the other way\n"
"		distanceFromPlane *= -1.f;\n"
"		normal *= -1.f;\n"
"	}\n"
"	hitNormalWorld = normal;\n"
"	bool isInsideContactPlane = distanceFromPlane < radiusWithThreshold;\n"
"	\n"
"	// Check for contact / intersection\n"
"	bool hasContact = false;\n"
"	float4 contactPoint;\n"
"	if (isInsideContactPlane) \n"
"	{\n"
"	\n"
"		if (pointInTriangle(vertices,&normal, &sphereCenter)) \n"
"		{\n"
"			// Inside the contact wedge - touches a point on the shell plane\n"
"			hasContact = true;\n"
"			contactPoint = sphereCenter - normal*distanceFromPlane;\n"
"			\n"
"		} else {\n"
"			// Could be inside one of the contact capsules\n"
"			float contactCapsuleRadiusSqr = radiusWithThreshold*radiusWithThreshold;\n"
"			float4 nearestOnEdge;\n"
"			int numEdges = 3;\n"
"			for (int i = 0; i < numEdges; i++) \n"
"			{\n"
"				float4 pa =vertices[i];\n"
"				float4 pb = vertices[(i+1)%3];\n"
"				float distanceSqr = segmentSqrDistance(pa,pb,sphereCenter, &nearestOnEdge);\n"
"				if (distanceSqr < contactCapsuleRadiusSqr) \n"
"				{\n"
"					// Yep, we're inside a capsule\n"
"					hasContact = true;\n"
"					contactPoint = nearestOnEdge;\n"
"					\n"
"				}\n"
"				\n"
"			}\n"
"		}\n"
"	}\n"
"	if (hasContact) \n"
"	{\n"
"		closestPnt = contactPoint;\n"
"		float4 contactToCenter = sphereCenter - contactPoint;\n"
"		minDist = length(contactToCenter);\n"
"		if (minDist>FLT_EPSILON)\n"
"		{\n"
"			hitNormalWorld = normalize(contactToCenter);//*(1./minDist);\n"
"			bCollide  = true;\n"
"		}\n"
"		\n"
"	}\n"
"	/////////////////////////////////////\n"
"	if (bCollide && minDist > -10000)\n"
"	{\n"
"		\n"
"		float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n"
"		float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n"
"		float actualDepth = minDist-radius;\n"
"		\n"
"		if (actualDepth<=0.f)\n"
"		{\n"
"			pOnB1.w = actualDepth;\n"
"			int dstIdx;\n"
"			\n"
"			float lenSqr = dot3F4(normalOnSurfaceB1,normalOnSurfaceB1);\n"
"			if (lenSqr>FLT_EPSILON)\n"
"			{\n"
"				AppendInc( nGlobalContactsOut, dstIdx );\n"
"			\n"
"				if (dstIdx < maxContactCapacity)\n"
"				{\n"
"					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
"					c->m_worldNormalOnB = -normalOnSurfaceB1;\n"
"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
"					c->m_batchIdx = pairIndex;\n"
"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n"
"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n"
"					c->m_worldPosB[0] = pOnB1;\n"
"					c->m_childIndexA = -1;\n"
"					c->m_childIndexB = faceIndex;\n"
"					GET_NPOINTS(*c) = 1;\n"
"				} \n"
"			}\n"
"		}\n"
"	}//if (hasCollision)\n"
"}\n"
"// work-in-progress\n"
"__kernel void   findConcaveSphereContactsKernel( __global int4* concavePairs,\n"
"												__global const BodyData* rigidBodies,\n"
"												__global const btCollidableGpu* collidables,\n"
"												__global const ConvexPolyhedronCL* convexShapes, \n"
"												__global const float4* vertices,\n"
"												__global const float4* uniqueEdges,\n"
"												__global const btGpuFace* faces,\n"
"												__global const int* indices,\n"
"												__global btAabbCL* aabbs,\n"
"												__global struct b3Contact4Data* restrict globalContactsOut,\n"
"												counter32_t nGlobalContactsOut,\n"
"													int numConcavePairs, int maxContactCapacity\n"
"												)\n"
"{\n"
"	int i = get_global_id(0);\n"
"	if (i>=numConcavePairs)\n"
"		return;\n"
"	int pairIdx = i;\n"
"	int bodyIndexA = concavePairs[i].x;\n"
"	int bodyIndexB = concavePairs[i].y;\n"
"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n"
"	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n"
"	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n"
"	if (collidables[collidableIndexB].m_shapeType==SHAPE_SPHERE)\n"
"	{\n"
"		int f = concavePairs[i].z;\n"
"		btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n"
"		\n"
"		float4 verticesA[3];\n"
"		for (int i=0;i<3;i++)\n"
"		{\n"
"			int index = indices[face.m_indexOffset+i];\n"
"			float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n"
"			verticesA[i] = vert;\n"
"		}\n"
"		float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n"
"		float sphereRadius = collidables[collidableIndexB].m_radius;\n"
"		float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n"
"		float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n"
"		computeContactSphereTriangle(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n"
"																rigidBodies,collidables,\n"
"																verticesA,\n"
"																globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n"
"																spherePos,sphereRadius,convexPos,convexOrn, f);\n"
"		return;\n"
"	}\n"
"}\n"
;
